library(tidyverse) # data manipulation
library(ggpubr) # producing data exploratory plots
library(modelsummary) # descriptive data
library(glmmTMB) # running generalised mixed models
library(DHARMa) # model diagnostics
library(performance) # model diagnostics
library(ggeffects) # partial effect plots
library(car) # running Anova on model
library(emmeans) # post-hoc analysisdf_adults_cleaned <- df_adults |>
mutate(FISH_ID = factor(FISH_ID),
Sex = factor(Sex),
Population = factor(Population),
Tank = factor(Tank),
Chamber = factor(Chamber),
System =factor(System),
Temperature =factor(Temperature),
True_resting=factor(True_resting))
df_males <- df_adults_cleaned |>
filter(Sex == "M")
df_females <- df_adults_cleaned |>
filter(Sex == "F")
df_adults_cleaned2 <- df_males |>
full_join(select(df_females, c("Tank","Temperature","Mass","Resting","Max","AAS","FISH_ID","Sex")), by="Tank") |>
mutate(Temperature.x = coalesce(Temperature.x, Temperature.y),
FISH_ID.x = coalesce(FISH_ID.x, FISH_ID.y),
Sex.x = coalesce(Sex.x, Sex.y),
Resting.midpoint = (Resting.x+Resting.y)/2,
Max.midpoint = (Max.x+Max.y)/2,
AAS.midpoint = (AAS.x+AAS.y)/2) df_jresp$Population <- fct_collapse(df_jresp$Population,
`Vlassof cay`= c("Vlassof reef", "Vlassof", "Vlassof Cay", "Vlassof cay"),
`Arlington reef` = c("Arlington reef","Arlginton reef"))
#df_jresp$Female <- fct_collapse(df_jresp$Female,
#`CARL359`= c("CARL359", "CARL59"))
df_jresp2 <- df_jresp |>
unite("F0", c("Male","Female"), sep="_", remove=FALSE) |>
mutate(across(1:7, factor),
Temperature = factor(Temperature),
True_resting = factor(True_resting))
#df_jresp2_rest <- df_jresp2 |>
#filter(True_resting == "Y")temp2a <- temp1a |>
left_join(select(df_adults_cleaned2, c("FISH_ID.x",
"Sex.x",
"Resting.x",
"Max.x",
"AAS.x",
"Mass.x")),
by="FISH_ID.x")temp2b <- temp1b |>
left_join(select(df_adults_cleaned2, c("FISH_ID.y",
"Sex.y",
"Resting.y",
"Max.y",
"AAS.y",
"Mass.y")),
by="FISH_ID.y") df_merged <- temp2a |>
left_join(select(temp2b, c("Clutch","Replicate",
"FISH_ID.y",
"Resting.y",
"Max.y",
"AAS.y",
"Mass.y")),
by=c("Clutch","Replicate"))df <- df_merged |>
mutate(Resting_MALE =Resting.x,
Max_MALE =Max.x,
AAS_MALE =AAS.x,
Mass_MALE =Mass.x,
FISH_ID.y =FISH_ID.x,#makes more sense for males to be .y instead of .x
FISH_ID.x =FISH_ID.x,
Resting_FEMALE =Resting.y,
Max_FEMALE =Max.y,
AAS_FEMALE =AAS.y,
Mass_FEMALE =Mass.y) |>
mutate(Max_MALE = Max_MALE/Mass_MALE,
Max_FEMALE =Max_FEMALE/Mass_FEMALE) |>
mutate(Max_MID =(Max_MALE+Max_FEMALE)/2) |> # easier to do it again
mutate(Max_MID =coalesce(Max_MID, Max_MALE)) |>
mutate(Max_MID =coalesce(Max_MID, Max_FEMALE)) |>
drop_na(Max) |>
group_by(Clutch) |>
mutate(MEDIAN_Max =median(Max_kg_wet)) |>
ungroup() |>
select(-c(Replicate, Chamber, System, Volume, Date_tested, Swim, Mass, Dry_mass, 18:26)) |>
distinct() |>
drop_na(Max_MID)plot <- ggplot(df, aes(x=Max_MALE, y=MEDIAN_Max, color=Temperature)) +
stat_smooth(method = "lm") +
#geom_point(alpha=0.1) +
ggtitle("Offspring-male relationship") +
xlab("Max (offspring)") +
ylab("Max (parental-male)") +
theme_classic() +
theme(legend.position = 'right')
plotplot <- ggplot(df, aes(x=Max_MID, y=MEDIAN_Max, color=Temperature)) +
stat_smooth(method = "lm") +
#geom_point(alpha=0.1) +
ggtitle("Offspring-midpoint relationship") +
ylab("Max (offspring)") + xlab("Max (parental-midpoint)") +
theme_classic() +
theme(legend.position = 'right')
plot| Population | 27 | 28.5 | 30 |
|---|---|---|---|
| Arlington reef | 11 | 7 | 3 |
| Pretty patches | 5 | 3 | 5 |
| Sudbury reef | 5 | 2 | 2 |
| Vlassof cay | 5 | 0 | 5 |
| F0 | 27 | 28.5 | 30 |
|---|---|---|---|
| CARL217_CARL226 | 0 | 1 | 0 |
| CARL218_CARL222 | 0 | 0 | 2 |
| CARL230_CARL235 | 4 | 0 | 0 |
| CARL233_CARL215 | 0 | 0 | 0 |
| CARL237_CARL219 | 2 | 0 | 0 |
| CARL241_CARL239 | 2 | 0 | 0 |
| CARL249_CARL360 | 0 | 0 | 1 |
| CARL335_CARL359 | 0 | 3 | 0 |
| CARL338_CARL345 | 0 | 1 | 0 |
| CARL344_CARL370 | 0 | 0 | 0 |
| CARL354_CARL355 | 3 | 0 | 0 |
| CARL360_CARL249 | 0 | 0 | 0 |
| CARL367_CARL363 | 0 | 1 | 0 |
| CARL369_CARL349 | 0 | 1 | 0 |
| CPRE189_CPRE202 | 0 | 0 | 2 |
| CPRE372_CPRE209 | 1 | 0 | 0 |
| CPRE372_CPRE370 | 1 | 0 | 0 |
| CPRE375_CPRE377 | 3 | 0 | 0 |
| CPRE391_CPRE390 | 0 | 0 | 1 |
| CPRE447_CPRE452 | 0 | 0 | 2 |
| CPRE453_CPRE459 | 0 | 1 | 0 |
| CPRE521_CPRE524 | 0 | 1 | 0 |
| CPRE550_CPRE533 | 0 | 1 | 0 |
| CSUD002_CSUD213 | 0 | 1 | 0 |
| CSUD009_CSUD212 | 3 | 0 | 0 |
| CSUD013_CSUD017 | 2 | 0 | 0 |
| CSUD016_CSUD078 | 0 | 1 | 0 |
| CSUD312_CSUD304 | 0 | 0 | 2 |
| CVLA049_CVLA098 | 0 | 0 | 0 |
| CVLA089_CVLA059 | 0 | 0 | 1 |
| CVLA102_CVLA466 | 1 | 0 | 0 |
| CVLA106_CVLA091 | 0 | 0 | 3 |
| CVLA468_CVLA477 | 3 | 0 | 0 |
| CVLA486_CVLA463 | 1 | 0 | 0 |
| CVLA498_CVLA493 | 0 | 0 | 1 |
| Temperature | NUnique | mean | median | min | max | sd | Histogram |
|---|---|---|---|---|---|---|---|
| 27 | 21 | 926.01 | 922.96 | 742.12 | 1128.22 | 102.09 | ▁▂▆▂▇▅▂▁▃ |
| 28.5 | 11 | 1059.28 | 1043.96 | 946.66 | 1211.16 | 86.34 | ▅▂▇▂▅▂▅ |
| 30 | 14 | 1178.44 | 1146.47 | 916.48 | 1459.40 | 159.66 | ▂▂▂▃▇▃▂▅ |
| Population | 27 | 28.5 | 30 |
|---|---|---|---|
| Arlington reef | 8 | 7 | 4 |
| Pretty patches | 4 | 6 | 4 |
| Sudbury reef | 4 | 3 | 2 |
| Vlassof cay | 6 | 2 | 5 |
datasummary(Factor(Population) ~ Factor(Temperature)*Factor(Sex),
data = df_adults_cleaned,
fmt = "%.0f")| 27 | 28.5 | 30 | ||||
|---|---|---|---|---|---|---|
| Population | F | M | F | M | F | M |
| Arlington reef | 4 | 4 | 2 | 5 | 2 | 2 |
| Pretty patches | 2 | 2 | 3 | 3 | 3 | 1 |
| Sudbury reef | 2 | 2 | 1 | 2 | 1 | 1 |
| Vlassof cay | 3 | 3 | 1 | 1 | 3 | 2 |
Pairs
datasummary(Factor(Population)*Factor(Temperature.x) ~ Max.x*(NUnique),
data = df_adults_cleaned2,
fmt = "%.0f")| Population | Temperature.x | NUnique |
|---|---|---|
| Arlington reef | 27 | 4 |
| 28.5 | 5 | |
| 30 | 2 | |
| Pretty patches | 27 | 2 |
| 28.5 | 3 | |
| 30 | 1 | |
| Sudbury reef | 27 | 2 |
| 28.5 | 2 | |
| 30 | 1 | |
| Vlassof cay | 27 | 3 |
| 28.5 | 1 | |
| 30 | 2 |
| Temperature | NUnique | mean | median | min | max | sd | Histogram |
|---|---|---|---|---|---|---|---|
| 27 | 22 | 16.58 | 16.91 | 9.70 | 22.06 | 3.36 | ▃▃▅▅▃▃▅▇▂ |
| 28.5 | 18 | 17.09 | 17.23 | 11.04 | 28.39 | 3.94 | ▅▂▅▇▇▃▂ |
| 30 | 12 | 16.80 | 17.35 | 11.78 | 21.24 | 3.03 | ▂▅▂▇▂▂▅▂ |
After figuring out which random factors will be incorporated into the model we will start to examine out fixed factors. Some fixed factors such as Max_(FE)MALE and TEMPERATURE will be essential to answering questions we have around heritability. Another factor that will be included is Dry_mass - which it should be pointed out in this experiment refers to the mass of fish after they were blotted dry with paper towel rather than completely dried out. Larger fish consume more oxygen, therefore, we need to account for this known relationship within our model. Out model will look something like this:
If we had alternative hypotheses to test would would do so at this stage. But in this instance the experiment was designed to answer a specific question via limiting potential covariates.
Great now lets check how out model performed via model validation techniques
To check out model performance we will be using two different packages that perform model diagnositics. The packages used here are just examples, there are other packages out there that can provide the same function.
## Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
##
## Scaled residual values: 0.064 0.22 0.548 0.088 0.248 0.12 0.16 0.116 0.136 0.476 0.408 0.744 0.344 0.276 0.584 0.568 0.628 0.66 0.464 0.988 ...
## $uniformity
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.093778, p-value = 0.8236
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0211, p-value = 0.816
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: simulationOutput
## outliers at both margin(s) = 0, observations = 45, p-value = 1
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.0000000 0.0787051
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0
## $uniformity
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.093778, p-value = 0.8236
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0211, p-value = 0.816
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: simulationOutput
## outliers at both margin(s) = 0, observations = 45, p-value = 1
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.0000000 0.0787051
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0
## Family: gaussian ( identity )
## Formula: MEDIAN_Max ~ scale(Max_MALE) * Temperature
## Data: df
##
## AIC BIC logLik deviance df.resid
## 562.7 575.3 -274.3 548.7 38
##
##
## Dispersion estimate for gaussian family (sigma^2): 1.16e+04
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 928.472 26.418 35.15 < 2e-16 ***
## scale(Max_MALE) 5.018 32.491 0.15 0.877255
## Temperature28.5 142.592 41.699 3.42 0.000627 ***
## Temperature30 296.210 128.042 2.31 0.020701 *
## scale(Max_MALE):Temperature28.5 -46.463 44.978 -1.03 0.301598
## scale(Max_MALE):Temperature30 -17.517 94.662 -0.19 0.853189
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2.5 % 97.5 % Estimate
## (Intercept) 876.69439 980.24979 928.472088
## scale(Max_MALE) -58.66269 68.69915 5.018226
## Temperature28.5 60.86327 224.32114 142.592206
## Temperature30 45.25298 547.16674 296.209861
## scale(Max_MALE):Temperature28.5 -134.61959 41.69285 -46.463373
## scale(Max_MALE):Temperature30 -203.05049 168.01613 -17.517180
model1.1 |> emmeans(pairwise ~ Temperature, type="response") |>
summary(by=NULL, adjust="sidak", infer=TRUE)## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## Temperature emmean SE df lower.CL upper.CL t.ratio p.value
## 27 928 26.4 38 862 994 35.146 <.0001
## 28.5 1071 32.3 38 990 1152 33.197 <.0001
## 30 1225 125.3 38 912 1538 9.775 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
## P value adjustment: sidak method for 3 tests
##
## $contrasts
## contrast estimate SE df lower.CL upper.CL t.ratio
## Temperature27 - Temperature28.5 -143 41.7 38 -247 -38.4 -3.420
## Temperature27 - Temperature30 -296 128.0 38 -616 23.6 -2.313
## Temperature28.5 - Temperature30 -154 129.4 38 -477 169.5 -1.187
## p.value
## 0.0045
## 0.0766
## 0.5652
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
## P value adjustment: sidak method for 3 tests
om.max <- emmeans(model1.1, ~Max_MALE*Temperature,
at =list(Max_MALE =seq(from=300, to =550, by=5)))
om.max.df <- as.data.frame(om.max)
om.max.obs <- drop_na(df, Max_MALE, MEDIAN_Max) |>
mutate(Pred =predict(model1.1, re.form =NA, type='response'),
Resid =residuals(model1.1, type ="response"),
Fit =Pred + Resid)
om.max.obs.summarize <- om.max.obs |>
group_by(Clutch, Temperature) |>
summarise(mean.max =mean(Fit, na.rm=TRUE),
mean.max_male =mean(Max_MALE, na.rm=TRUE),
sd.max =sd(Fit, na.rm =TRUE),
n.max = n()) |>
mutate(se.max = sd.max / sqrt(n.max),
lower.ci.max =mean.max - qt(1 - (0.05/2), n.max -1) * se.max,
upper.ci.max =mean.max + qt(1 - (0.05/2), n.max - 1) * se.max)|>
ungroup()## `summarise()` has grouped output by 'Clutch'. You can override using the
## `.groups` argument.
## Warning: There were 62 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `lower.ci.max = mean.max - qt(1 - (0.05/2), n.max - 1) *
## se.max`.
## ℹ In group 1: `Clutch = 38`.
## Caused by warning in `qt()`:
## ! NaNs produced
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 61 remaining warnings.
ggplot(data =om.max.df, aes(y=emmean, x=Max_MALE)) +
stat_smooth(aes(color=Temperature),
method = "lm") +
geom_pointrange(data = om.max.obs.summarize, aes(y =mean.max, x=mean.max_male,
ymin =lower.ci.max,
ymax =upper.ci.max, color = Temperature),
alpha =0.2) +
facet_wrap(~Temperature) +
theme_classic() +
theme(legend.position ="bottom")## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_segment()`).
Great now lets check how out model performed via model validation techniques
To check out model performance we will be using two different packages that perform model diagnositics. The packages used here are just examples, there are other packages out there that can provide the same function.
## Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
##
## Scaled residual values: 0.044 0.18 0.572 0.196 0.108 0.3 0.252 0.148 0 0.112 0.18 0.176 0.472 0.448 0.772 0.528 0.38 0.4 0.56 0.636 ...
## $uniformity
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.10725, p-value = 0.5757
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0199, p-value = 0.824
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: simulationOutput
## outliers at both margin(s) = 1, observations = 53, p-value = 0.3456
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.0004775804 0.1007015268
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0.01886792
## $uniformity
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.10725, p-value = 0.5757
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0199, p-value = 0.824
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: simulationOutput
## outliers at both margin(s) = 1, observations = 53, p-value = 0.3456
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.0004775804 0.1007015268
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0.01886792
## Family: gaussian ( identity )
## Formula: MEDIAN_Max ~ scale(Max_MID) * Temperature
## Data: df
##
## AIC BIC logLik deviance df.resid
## 665.4 679.2 -325.7 651.4 46
##
##
## Dispersion estimate for gaussian family (sigma^2): 1.27e+04
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 930.561 23.792 39.11 < 2e-16 ***
## scale(Max_MID) 12.824 24.658 0.52 0.60301
## Temperature28.5 133.571 40.524 3.30 0.00098 ***
## Temperature30 243.039 42.467 5.72 1.05e-08 ***
## scale(Max_MID):Temperature28.5 -49.182 37.955 -1.30 0.19505
## scale(Max_MID):Temperature30 -3.299 46.033 -0.07 0.94286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2.5 % 97.5 % Estimate
## (Intercept) 883.92997 977.19288 930.561420
## scale(Max_MID) -35.50429 61.15177 12.823740
## Temperature28.5 54.14541 212.99604 133.570727
## Temperature30 159.80482 326.27245 243.038635
## scale(Max_MID):Temperature28.5 -123.57303 25.20887 -49.182078
## scale(Max_MID):Temperature30 -93.52175 86.92327 -3.299244
om.max <- emmeans(mid_model1.1, ~Max_MID*Temperature,
at =list(Max_MID =seq(from=300, to =600, by=5)))
om.max.df <- as.data.frame(om.max)
om.max.obs <- drop_na(df, Max_MID, MEDIAN_Max) |>
mutate(Pred =predict(mid_model1.1, re.form =NA, type='response'),
Resid =residuals(mid_model1.1, type ="response"),
Fit =Pred + Resid)
om.max.obs.summarize <- om.max.obs |>
group_by(Clutch, Temperature) |>
summarise(mean.max =mean(Fit, na.rm=TRUE),
mean.max_female =mean(Max_MID, na.rm=TRUE),
sd.max =sd(Fit, na.rm =TRUE),
n.max = n()) |>
mutate(se.max = sd.max / sqrt(n.max),
lower.ci.max =mean.max - qt(1 - (0.05/2), n.max -1) * se.max,
upper.ci.max =mean.max + qt(1 - (0.05/2), n.max - 1) * se.max)|>
ungroup()## `summarise()` has grouped output by 'Clutch'. You can override using the
## `.groups` argument.
## Warning: There were 78 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `lower.ci.max = mean.max - qt(1 - (0.05/2), n.max - 1) *
## se.max`.
## ℹ In group 1: `Clutch = 38`.
## Caused by warning in `qt()`:
## ! NaNs produced
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 77 remaining warnings.
ggplot(data =om.max.df, aes(y=emmean, x=Max_MID)) +
stat_smooth(aes(color=Temperature),
method = "lm") +
geom_pointrange(data = om.max.obs.summarize, aes(y =mean.max, x=mean.max_female,
ymin =lower.ci.max,
ymax =upper.ci.max, color = Temperature),
alpha =0.2) +
facet_wrap(~Temperature) +
theme_classic() +
theme(legend.position ="bottom")## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_segment()`).